# -*- coding: utf-8 -*-
# @Function: 初始化jieba分词词典

import jieba
import os

def init_jieba_dict():
    """初始化jieba分词词典，添加专业词汇"""
    # 获取当前文件所在目录的父目录（nlp的父目录）
    parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    
    # 构建词典文件路径
    core_terms_path = os.path.join(parent_dir, 'keyword_utils', 'core_terms.txt')
    general_terms_path = os.path.join(parent_dir, 'keyword_utils', 'general_terms.txt')
    
    core_terms = []
    general_terms = []
    
    # 读取核心术语
    try:
        with open(core_terms_path, 'r', encoding='utf-8') as f:
            core_terms = [line.strip() for line in f if line.strip()]
    except Exception as e:
        print(f"读取核心术语文件失败: {e}")
    
    # 读取一般术语
    try:
        with open(general_terms_path, 'r', encoding='utf-8') as f:
            general_terms = [line.strip() for line in f if line.strip()]
    except Exception as e:
        print(f"读取一般术语文件失败: {e}")
    
    # 添加所有词汇到jieba词典
    for term in core_terms + general_terms:
        jieba.add_word(term)
    
    print(f"已添加 {len(core_terms)} 个核心术语和 {len(general_terms)} 个一般术语到jieba词典")

if __name__ == '__main__':
    init_jieba_dict()